#R version 4.2.1 (2022-06-23)
#Platform: x86_64-w64-mingw32/x64 (64-bit)
#Running under: Windows 10 x64 (build 19044)

library(dplyr) #version 1.0.9
library(readr) #version 2.1.2
library(stringr) #version 1.4.0
library(ltm) #1.2-0 (attached with polycor_0.8-1 / msm_1.6.9 / MASS_7.3-57)

#generate IRT values for dependent variable -- bounded democratization

#gwf irt#####
gwf_base <- read_csv("data/all_regimes.csv") %>% 
  filter(dataset == "gwf") %>% 
  mutate_if(is.numeric, list(~na_if(., 99))) %>% 
  filter(!is.na(executive1)) %>% 
  mutate(nondem = ifelse(regime2 == 1 | regime3 == 1 | regime4 == 1 | regime5 == 1 | regime6 == 1, 1, 0)) %>% 
  mutate(closed = ifelse(regime2 | regime5 == 1, 1, 0)) %>% 
  filter(closed == 0) %>% 
  dplyr::select(case, transition, contains(c("executive", "legislature", "election", "transitionary")), -executive5, -transitionary4) %>% 
  unique()
  
gwf_irt <- dplyr::select(gwf_base, contains(c("executive", "legislature", "election", "transitionary"))) 

gwf_fit <- ltm(gwf_irt ~ z1)

gwf_lrt <- factor.scores(gwf_fit,  resp.patterns = gwf_irt)$score.dat

gwf_alpha <- descript(gwf_irt)$alpha
cronbach.alpha(gwf_irt, na.rm = T)

gwf_base <- gwf_base %>% 
  mutate(gwf_irt = (gwf_lrt$z1 - min(gwf_lrt$z1))/(max(gwf_lrt$z1)-min(gwf_lrt$z1))) %>% 
  write_csv("data/gwf_irt.csv")


#svolik irt#####
svolik_base <- read_csv("data/all_regimes.csv") %>% 
  filter(dataset == "svolik") %>% 
  mutate_if(is.numeric, list(~na_if(., 99))) %>% 
  filter(!is.na(executive1)) %>% 
  mutate(nondem = ifelse(regime2 == 1 | regime3 == 1 | regime4 == 1 | regime5 == 1 | regime6 == 1, 1, 0)) %>% 
  mutate(closed = ifelse(regime2 | regime5 == 1, 1, 0)) %>% 
  filter(closed == 0) %>% 
  dplyr::select(case, transition, contains(c("executive", "legislature", "election", "transitionary")), -executive5, -transitionary4) %>% 
  unique()

svolik_irt <- dplyr::select(svolik_base, contains(c("executive", "legislature", "election", "transitionary"))) 

svolik_fit <- ltm(svolik_irt ~ z1)

svolik_lrt <- factor.scores(svolik_fit,  resp.patterns = svolik_irt)$score.dat

svolik_alpha <- descript(svolik_irt)$alpha
cronbach.alpha(svolik_irt, na.rm = T)

svolik_base <- svolik_base %>% 
  mutate(svolik_irt = (svolik_lrt$z1 - min(svolik_lrt$z1))/(max(svolik_lrt$z1)-min(svolik_lrt$z1))) %>% 
  write_csv("data/svolik_irt.csv")


#dd irt#####
dd_base <- read_csv("data/all_regimes.csv") %>% 
  filter(dataset == "dd") %>% 
  mutate_if(is.numeric, list(~na_if(., 99))) %>% 
  filter(!is.na(executive1)) %>% 
  mutate(nondem = ifelse(regime2 == 1 | regime3 == 1 | regime4 == 1 | regime5 == 1 | regime6 == 1, 1, 0)) %>% 
  mutate(closed = ifelse(regime2 | regime5 == 1, 1, 0)) %>% 
  filter(closed == 0) %>% 
  dplyr::select(case, transition, contains(c("executive", "legislature", "election", "transitionary")), -executive5, -transitionary4) %>% 
  unique()

dd_irt <- dplyr::select(dd_base, contains(c("executive", "legislature", "election", "transitionary"))) 

dd_fit <- ltm(dd_irt ~ z1)

dd_lrt <- factor.scores(dd_fit,  resp.patterns = dd_irt)$score.dat

dd_alpha <- descript(dd_irt)$alpha
cronbach.alpha(dd_irt, na.rm = T)

dd_base <- dd_base %>% 
  mutate(dd_irt = (dd_lrt$z1 - min(dd_lrt$z1))/(max(dd_lrt$z1)-min(dd_lrt$z1))) %>% 
  write_csv("data/dd_irt.csv")

#wth irt#####
wth_base <- read_csv("data/all_regimes.csv") %>% 
  filter(dataset == "wth") %>% 
  mutate_if(is.numeric, list(~na_if(., 99))) %>% 
  filter(!is.na(executive1)) %>% 
  mutate(nondem = ifelse(regime2 == 1 | regime3 == 1 | regime4 == 1 | regime5 == 1 | regime6 == 1, 1, 0)) %>% 
  mutate(closed = ifelse(regime2 | regime5 == 1, 1, 0)) %>% 
  filter(closed == 0) %>% 
  dplyr::select(case, transition, contains(c("executive", "legislature", "election", "transitionary")), -executive5, -transitionary4) %>% 
  unique()

wth_irt <- dplyr::select(wth_base, contains(c("executive", "legislature", "election", "transitionary"))) 

wth_fit <- ltm(wth_irt ~ z1)

wth_lrt <- factor.scores(wth_fit,  resp.patterns = wth_irt)$score.dat

wth_alpha <- descript(wth_irt)$alpha
cronbach.alpha(wth_irt, na.rm = T)

wth_base <- wth_base %>% 
  mutate(wth_irt = (wth_lrt$z1 - min(wth_lrt$z1))/(max(wth_lrt$z1)-min(wth_lrt$z1))) %>% 
  write_csv("data/wth_irt.csv")

#unload packages so dplyr works again
detach("package:ltm", unload=TRUE)
detach("package:margins", unload=TRUE)
detach("package:MASS", unload=TRUE)
detach("package:msm", unload=TRUE)
detach("package:polycor", unload=TRUE)

#validation####

#cronbach's alphas to produce Table 3
alphas <- as.data.frame(cbind(gwf_alpha, svolik_alpha, dd_alpha, wth_alpha)) %>% 
  add_rownames(var = "rowname") %>% 
  mutate(rowname = str_remove(rowname, "Excluding")) %>%  na.omit() %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% 
  filter(rowname != "All Items")
colnames(alphas) <- c("Variable", "GWF", "PAR", "DD", "WTH")

stargazer(alphas, summary = FALSE, rownames = F)

#summary means
gwf <- summarize_all(gwf_base, list(~mean(., na.rm = T))) %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% t() 

par <- summarize_all(svolik_base, list(~mean(., na.rm = T))) %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% t() 

dd <- summarize_all(dd_base, list(~mean(., na.rm = T))) %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% t() 

wth <- summarize_all(wth_base, list(~mean(., na.rm = T))) %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% t() 

mean_all <- cbind(gwf, par, dd, wth)
colnames(mean_all) <- c("GWF", "PAR", "DD", "WTH")

stargazer(mean_all, summary = F)
             
#summary sd devs
gwf <- summarize_all(gwf_base, list(~sd(., na.rm = T))) %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% t() 

par <- summarize_all(svolik_base, list(~sd(., na.rm = T))) %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% t() 

dd <- summarize_all(dd_base, list(~sd(., na.rm = T))) %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% t() 

wth <- summarize_all(wth_base, list(~sd(., na.rm = T))) %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% t() 

sd_all <- cbind(gwf, par, dd, wth)
colnames(sd_all) <- c("GWF", "PAR", "DD", "WTH")

stargazer(sd_all, summary = F)



